Creating dataset:
Now let’s proceed as in mash, selecting the ‘maxes’ from a set of the top 1000:
z.stat=read.table("z.matched.txt",header = TRUE)
v.j=matrix(rep(1,ncol(z.stat)*nrow(z.stat)),ncol=16,nrow=nrow(z.stat))
max.z=cbind(z.stat,maxz=apply(z.stat,1,function(x){max(abs(x))}))
max.z.sort=max.z[order(max.z[,"maxz"],decreasing=T),]
###use these strongest 1000 to build covariance matrices
maxes=max.z.sort[1:1000,-17]
image(cor(maxes))
write.table(maxes,"maxz.txt",col.names=FALSE,row.names=FALSE)
Now run SFA:
#system('/Users/sarahurbut/miniconda3/bin/sfa -gen ./maxz.txt -g 1000 -n 16 -o consortiumz i -k 5')
library('mash')
factor.mat=as.matrix(read.table("consortiumz_F.out"))
lambda.mat=as.matrix(read.table("consortiumz_lambda.out"))
library('mash')
library('ExtremeDeconvolution')
max.z=read.table("maxz.txt")
max.v=matrix(rep(1,ncol(max.z)*nrow(max.z)),ncol=16,nrow=nrow(max.z))
dim(max.z)
ms=deconvolution.em.with.bovy(max.z,factor.mat,max.v,lambda.mat,K=3,P=3)
saveRDS(ms,"maxstepbovy.rds")
Now we want to compute the covariance matrices using the max step:
ms=readRDS("maxstepbovy.rds")
A="consortium"
covmash=compute.hm.covmat.all.max.step(max.step = ms,b.hat = z.stat,se.hat = v.j,t.stat = max.z,Q = 5,lambda.mat = lambda.mat,A = "test",factor.mat = factor.mat,zero = T,power = 2)$covmat
set.seed(123)
index=sample(1:nrow(z.stat),50000,replace=F)
write.table(index,"index.txt")
index=read.table("index.txt")[,1]
train.t=z.stat[index,]
se.train=v.j[index,]
compute.hm.train.log.lik.pen(train.b = train.t,se.train = se.train,covmat = covmash,A=A,pen=1)
Let’s examine the patterns of covariance and the hierarchical weights and any patterns of tissue specificity.
Let’s look at the covariance patterns captured.
## [1] TRUE
## [1] TRUE
## [1] TRUE
## [1] 0.4838265
## [1] 0.4838265
## [1] 0.5161735
## [1] 0.5161735
We can see that 0.5936194 proportion of the prior weight is assigned to the learned matrices. Looks good!
Do we see patterns of specificity? It appears that specificity to one subgroup is very rare, since the correlation structure is so rich. Of note, schizophrenia and height stand out. Let’s look at tissue specific:
How many associations do we call significant at an LFSR of 0.05? 338655 which is 0.0257768 part of the data. When we plot t-spec by subgroup, we see that subgroup specific effects are rare and of small magnitude because of the sharing among tissues for a given fold change difference.
What about in ash?
#ashmeans=read.table("univariateash.pm.txt")
ashlfsr=read.table('univariateash.lfsr.txt')
#dim(ashmeans)
dim(ashlfsr)
## [1] 821124 16
sum(ashlfsr<0.05)
## [1] 82384
mean(ashlfsr<0.05)
## [1] 0.006270673
colnames(lfsr.mash)=names
rownames(lfsr.mash)=rownames(z.stat)
dist=lfsr.mash<0.05;
lapply(seq(1:ncol(lfsr.mash)),function(x){a=rownames(lfsr.mash)[which(rowSums(dist)==1&lfsr.mash[,x]<0.05)];write.table(a,paste0("../Data/traitspec/traitspec",colnames(lfsr.mash)[x],".txt"))})
lapply(seq(1:ncol(lfsr.mash)),function(x){a=rownames(lfsr.mash)[which(lfsr.mash[,x]<0.05)];write.table(a,paste0("../Data/significant/",colnames(lfsr.mash)[x],"significantin.txt"))})
As an example confirmation, https://www.snpedia.com/index.php/Rs1042725 found nature SNP rs1042725 is associated with height (P = 4E-8) in a study involving over 20,000 individuals.rs6060369 was also confirmed.
Let’s see how many SNPS per trait:
barplot(colSums(lfsr.mash<0.05),main="Number of Snps Significant at LFSR<0.05")
Let’s generate the top SNPs per trait:
colnames(lfsr.mash)=colnames(z.stat)
rownames(lfsr.mash)=rownames(z.stat)
data.frame(apply(lfsr.mash,2,function(x){(rownames(lfsr.mash)[order(x,decreasing=F)][1:10])}))
## reprogen_aam_beta pgc_scz_beta pgc_mdd_beta pgc_bip_beta pgc_as_beta
## 1 rs4976665 rs10484439 rs10182181 rs4382763 rs1269195
## 2 rs11739147 rs13198716 rs6752378 rs12075079 rs9375450
## 3 rs734518 rs7746199 rs6545814 rs10489290 rs2184968
## 4 rs17355209 rs13195040 rs6721750 rs4072117 rs2152876
## 5 rs1472920 rs10484399 rs2384058 rs12402454 rs4559102
## 6 rs2962842 rs17749927 rs13407913 rs10489289 rs4549631
## 7 rs2940521 rs17750424 rs713587 rs16844255 rs7738836
## 8 rs2860534 rs17693963 rs7567997 rs12911223 rs1538956
## 9 rs3923879 rs13199772 rs2033654 rs7296288 rs1340952
## 10 rs7718874 rs13197574 rs10198275 rs2135551 rs3861455
## pgc_an_beta magic_fg_beta ibdgenetics_uc_beta ibdgenetics_ibd_beta
## 1 rs724016 rs2908282 rs3812558 rs6556412
## 2 rs9846396 rs10830961 rs10753575 rs3812558
## 3 rs12969709 rs477224 rs6426833 rs4487900
## 4 rs1344674 rs12475700 rs10737482 rs12413565
## 5 rs11662368 rs1402837 rs7553638 rs1468889
## 6 rs6763931 rs486981 rs4654903 rs6656890
## 7 rs6764769 rs508506 rs11805303 rs7515029
## 8 rs12964203 rs494874 rs1343151 rs4655679
## 9 rs1539952 rs2685805 rs11465817 rs10789224
## 10 rs6440003 rs6709087 rs10889677 rs11209008
## ibdgenetics_cd_beta giant_height_beta giant_bmi_beta gefos_ls_beta
## 1 rs6556412 rs1571466 rs879048 rs6469792
## 2 rs3812558 rs3116228 rs7193144 rs7010043
## 3 rs4487900 rs3100596 rs11664327 rs6469795
## 4 rs1468889 rs1078968 rs1893512 rs9650075
## 5 rs7515029 rs6763931 rs2568958 rs4424291
## 6 rs4655679 rs11711375 rs11209951 rs7016585
## 7 rs10789224 rs11725731 rs630372 rs6469804
## 8 rs11209005 rs17427971 rs543874 rs10955924
## 9 rs11209008 rs7684769 rs10913469 rs9594738
## 10 rs6688383 rs951252 rs2867105 rs7014574
## gefos_fn_beta gefos_fa_beta angst_anx_beta
## 1 rs6894139 rs3779381 rs12969709
## 2 rs10037512 rs3801387 rs11662368
## 3 rs1366594 rs917727 rs1539952
## 4 rs1346452 rs718766 rs12964203
## 5 rs4916669 rs7776725 rs6567155
## 6 rs984399 rs2254595 rs12957347
## 7 rs7728690 rs6965592 rs12970134
## 8 rs4916666 rs6894139 rs11584383
## 9 rs1159666 rs10260002 rs9846396
## 10 rs13436401 rs1366594 rs1150754
write.table(data.frame(apply(lfsr.mash,2,function(x){(rownames(lfsr.mash)[order(x,decreasing=F)][1:10])}))
,"top10pertrait.txt")
Let’s show how this integrates with the GTEX results:
topten=read.table("top10pertrait.txt",header=T,stringsAsFactors = F)
topten[1,"scz"]
## [1] "rs10484439"
Is an eQTL in Brain, Frontal Cortex as shown here in the GteX browser representing a possible link to disease.
Now, we’d like to add pairwise sharing by magnitude and sign:
library("mashr")
se.matched=as.matrix(read.table("se.matched.txt"))
pm.mash.beta=post.means*se.matched
colnames(pm.mash.beta)=names
lfsr.mash.sig=lfsr.mash[rowSums(lfsr.mash<0.05)>0,]##only 137,223 are significant in at least one subgroup
pm.mash.sig=pm.mash.beta[rowSums(lfsr.mash<0.05)>0,]
signheatmap=compute.sharing.by.sign(lfsr.mash = lfsr.mash.sig,thresh = 0.05,pm.mash.beta = pm.mash.sig)
signheatmap[lower.tri(signheatmap)] <- NA
magheatmap=compute.mag.by.sharing(lfsr.mash = lfsr.mash.sig,thresh = 0.05,pm.mash.beta = pm.mash.sig)
magheatmap[lower.tri(magheatmap)] <- NA
library('colorRamps')
library('corrplot')
library(gplots)
library(ggplot2)
class(signheatmap)
## [1] "matrix"
heatmap.2(signheatmap,Rowv=FALSE,Colv=FALSE,
symm=TRUE,dendrogram="none",density="none",trace="none",#col=redblue,
col=blue2green,main=paste0("Pairwise Sharing by Sign"),
cexRow=0.6,cexCol=0.5,cex.main=0.5,breaks=seq(0.7,1,0.01))
library('lattice')
clrs <- colorRampPalette(rev(c("#D73027","#FC8D59","#FEE090","#FFFFBF",
"#E0F3F8","#91BFDB","#4575B4")))(64)
#clrs[63:64] <- "darkviolet"
lat=signheatmap
lat[lower.tri(lat)] <- NA
print(levelplot(lat,col.regions = clrs,xlab = "",ylab = "",colorkey = TRUE,title(main="PairwiseSharingBySign")))
heatmap.2(magheatmap,Rowv=FALSE,Colv=FALSE,
symm=TRUE,dendrogram="none",density="none",trace="none",#col=redblue,
col=blue2green,main=paste0("Pairwise Sharing by Magnitude"),
cexRow=0.6,cexCol=2,cex.main=0.5,#breaks=seq(0.35,1,0.01),
labCol=NA)
library('lattice')
clrs <- colorRampPalette(rev(c("#D73027","#FC8D59","#FEE090","#FFFFBF",
"#E0F3F8","#91BFDB","#4575B4")))(64)
#clrs[63:64] <- "darkviolet"
absmagheatmap=compute.mag.by.sharing(lfsr.mash = lfsr.mash.sig,thresh = 0.05,pm.mash.beta = abs(pm.mash.sig))
absmagheatmap[lower.tri(absmagheatmap)] <- NA
lat=absmagheatmap
lat[lower.tri(lat)] <- NA
print(levelplot(lat,col.regions = clrs,xlab = "",ylab = "",colorkey = TRUE,title(main="PairwiseSharingByMagnitude")))
Now, let’s look at sharing my asbolute value of magntiude:
heatmap.2(signheatmap,Rowv=FALSE,Colv=FALSE,
symm=TRUE,dendrogram="none",density="none",trace="none",#col=redblue,
col=blue2green,main=paste0("Pairwise Sharing by Sign"),
cexRow=0.6,cexCol=0.5,cex.main=0.5)
#,breaks=seq(0.7,1,0.01))
heatmap.2(1-signheatmap,Rowv=FALSE,Colv=FALSE,
symm=TRUE,dendrogram="none",density="none",trace="none",#col=redblue,
col=blue2green,main=paste0("Pairwise Sharing by Opposite Sign"),
cexRow=0.6,cexCol=0.5,cex.main=0.5)
#,breaks=seq(0.7,1,0.01))
absmagheatmap=compute.mag.by.sharing(lfsr.mash = lfsr.mash.sig,thresh = 0.05,pm.mash.beta = abs(pm.mash.sig))
absmagheatmap[lower.tri(absmagheatmap)] <- NA
heatmap.2(absmagheatmap,Rowv=FALSE,Colv=FALSE,
symm=TRUE,dendrogram="none",density="none",trace="none",#col=redblue,
col=blue2green,main=paste0("Pairwise Sharing by abs(Magnitude)"),
cexRow=0.6,cexCol=2,cex.main=0.5,#breaks=seq(0.35,1,0.01),
labCol=NA)
heatmap.2(magheatmap,Rowv=FALSE,Colv=FALSE,
symm=TRUE,dendrogram="none",density="none",trace="none",#col=redblue,
col=blue2green,main=paste0("Pairwise Sharing by Magnitude"),
cexRow=0.6,cexCol=2,cex.main=0.5,#breaks=seq(0.35,1,0.01),
labCol=NA)